# -*- coding: utf-8 -*-
# ----------------------------
# @Time    : 2021/6/26 下午12:01
# @Author  : changqingai
# @FileName: pyspark_sql.py
# ----------------------------

from pyspark.sql import SparkSession
from pyspark.sql.functions import split, explode

if __name__ == "__main__":
    spark = SparkSession.builder.appName("pysparl_sql").master("local[*]").getOrCreate()

    df = spark.read.text("../datasets/wc/")
    words_df = df.select(explode(split(df.value, " ")).alias("word"))

    words_df.printSchema()

    # 以mysql的方式操作
    words_df.createOrReplaceTempView("t_word")
    spark.sql("select word, count(*) counts from t_word group by word order by counts desc ").show()

    # dataFrame的方式操作
    words_df.groupBy("word").count().orderBy("count").show()
